Load libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggpubr)
library(ggsci)
library(ggExtra)
library(patchwork)
select = dplyr::select
mutate = dplyr::mutate

Load Data

Load pre-processed vcf data and metadata

df_seq <- readRDS(file = "../data/processed_data/christos_2021_NGmerge.rds")
head(df_seq)
SAMPLE POS ID REF ALT QUAL FILTER VCF_INFO FORMAT ALLELE FREQ MUT_TYPE READ_DEPTH EDIT_TYPE EDITOR SITE REPLICATE Kreads >=Q30
P20657_1010 7 . A <*> 0 . DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL:AD A 0 REF 0 reference AID A 1 183.45 85.82
P20657_1010 7 . A <*> 0 . DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL:AD D 0 SNP 0 A_to_D AID A 1 183.45 85.82
P20657_1018 7 . A <*> 0 . DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL:AD A 0 REF 0 reference AID C 3 853.00 97.40
P20657_1018 7 . A <*> 0 . DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL:AD D 0 SNP 0 A_to_D AID C 3 853.00 97.40
P20657_1041 7 . A <*> 0 . DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL:AD A 0 REF 0 reference Nish B 2 159.23 96.26
P20657_1041 7 . A <*> 0 . DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 PL:AD D 0 SNP 0 A_to_D Nish B 2 159.23 96.26

Plot non-normalized data

Let’s begin by plotting read depth across the sequence per sample

df_seq %>%
  group_by(SAMPLE) %>% 
  summarise(median_read_depth = median(READ_DEPTH)) %>% 
  ungroup() %>% 
  head()
SAMPLE median_read_depth
P20657_1001 136
P20657_1002 196
P20657_1003 125
P20657_1004 123
P20657_1005 61
P20657_1006 22
df_seq %>% 
  ggplot(aes(x=POS,y=READ_DEPTH,group=SAMPLE, color=EDITOR)) +
  geom_point() +
  theme_bw() +
  ylab("Read depth") +
  xlab("CAN1 position") +
  theme(legend.position = "bottom")

#plot 2x only
df_seq %>% 
  filter(EDITOR == "2x") %>%
  ggplot(aes(x=POS,y=READ_DEPTH,group=SAMPLE, color=REPLICATE)) +
  geom_point() +
  theme_bw() +
  ylab("Read depth") +
  xlab("CAN1 position") +
  theme(legend.position = "bottom")

Let’s look at the amount of mutations per sample, and compare this to read depth

df_seq %>%
  group_by(SAMPLE) %>% 
  summarise(median_read_depth = mean(READ_DEPTH),
            median_mutation_freq = mean(FREQ)) %>% 
  ungroup()
SAMPLE median_read_depth median_mutation_freq
P20657_1001 57210.1531 14302.54198
P20657_1002 78848.9830 19712.24767
P20657_1003 34490.6209 8622.65818
P20657_1004 68929.2964 17232.32824
P20657_1005 61622.9601 15405.74300
P20657_1006 122290.5891 30572.65734
P20657_1007 55732.1781 13933.04453
P20657_1008 104411.5038 26102.87659
P20657_1009 137252.6862 34313.17303
P20657_1010 35718.5182 8929.63232
P20657_1011 39823.2044 9955.80492
P20657_1012 27889.5055 6972.37701
P20657_1013 143.1450 35.78626
P20657_1014 46253.4707 11563.37235
P20657_1015 116956.9712 29239.25106
P20657_1016 161131.9432 40282.98685
P20657_1017 543.3656 135.84139
P20657_1018 192380.2010 48095.05174
P20657_1019 33629.9444 8407.49109
P20657_1020 48188.3206 12047.08185
P20657_1021 36623.0492 9155.76463
P20657_1022 49686.3198 12421.58439
P20657_1023 33060.8104 8265.20738
P20657_1024 37720.4288 9430.11069
P20657_1025 166698.4003 41674.60093
P20657_1026 154900.1247 38725.03308
P20657_1027 63063.1807 15765.79517
P20657_1028 48407.9249 12101.98219
P20657_1029 53170.5674 13292.64249
P20657_1030 39140.3715 9785.09372
P20657_1031 32086.0199 8021.50763
P20657_1032 55059.6187 13764.91009
P20657_1033 35040.1662 8760.04326
P20657_1034 145882.9779 36470.74640
P20657_1035 192352.8753 48088.21968
P20657_1036 93333.7294 23333.43299
P20657_1037 48340.8469 12085.21332
P20657_1038 70347.0335 17586.76124
P20657_1039 56317.3363 14079.33545
P20657_1040 29928.4822 7482.12256
P20657_1041 48556.2642 12139.07082
P20657_1042 34413.6293 8603.40967
P20657_1043 99475.5004 24868.87532
P20657_1044 135857.8991 33964.47498
P20657_1045 135367.4037 33841.85199
P20657_1046 13360.9822 3340.24597
P20657_1047 79826.4169 19956.60517
P20657_1048 55364.2498 13841.06404
P20657_1049 58121.8202 14530.45929
P20657_1050 49723.5030 12430.88041
P20657_1051 46804.9534 11701.24088
P20657_1052 35318.2655 8829.56658
P20657_1053 41236.1137 10309.02884
P20657_1054 19853.9779 4963.49491
P20657_1055 42293.0017 10573.25064
P20657_1056 29868.1289 7467.03308
P20657_1057 47496.4228 11874.10941
P20657_1058 30180.1506 7545.04029
P20657_1059 20493.5488 5123.38889
P20657_1060 62136.8584 15534.21459
P20657_1061 75452.3011 18863.07591
P20657_1062 43026.4928 10756.62341
P20657_1063 34489.6476 8622.41264
P20657_1064 36297.0407 9074.26209
P20657_1065 27371.7578 6842.94020
P20657_1066 47682.8469 11920.71501

Data pre-processing

Adjust for read depth and end regions

Remove low coverage regions

df_seq = df_seq %>% 
  filter(READ_DEPTH > 5000)
  
df_seq %>% head()
SAMPLE POS ID REF ALT QUAL FILTER VCF_INFO FORMAT ALLELE FREQ MUT_TYPE READ_DEPTH EDIT_TYPE EDITOR SITE REPLICATE Kreads >=Q30
P20657_1010 9 . T G,C,<*> 0 . DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD T 110498 REF 110532 reference AID A 1 183.45 85.82
P20657_1010 9 . T G,C,<*> 0 . DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD G 24 SNP 110532 T_to_G AID A 1 183.45 85.82
P20657_1010 9 . T G,C,<*> 0 . DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD C 10 SNP 110532 T_to_C AID A 1 183.45 85.82
P20657_1010 9 . T G,C,<*> 0 . DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD D 0 SNP 110532 T_to_D AID A 1 183.45 85.82
P20657_1002 9 . T G,C,<*> 0 . DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD T 244018 REF 244093 reference WT A 2 285.05 94.97
P20657_1002 9 . T G,C,<*> 0 . DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD G 62 SNP 244093 T_to_G WT A 2 285.05 94.97

Trim edges of each region

df_seq =
  df_seq %>% 
  filter((POS > 52 & POS < 155) | (POS > 730 & POS < 840) | (POS > 1580 & POS < 1681))

Normalize by sequencing depth

df_seq = df_seq %>% 
  group_by(SAMPLE,POS) %>% 
  mutate(NORM_FREQ = (FREQ+1)/(READ_DEPTH),
         LOG_NORM_FREQ = log2((FREQ+1)/READ_DEPTH)) %>% 
  ungroup()

df_seq %>% head()
SAMPLE POS ID REF ALT QUAL FILTER VCF_INFO FORMAT ALLELE FREQ MUT_TYPE READ_DEPTH EDIT_TYPE EDITOR SITE REPLICATE Kreads >=Q30 NORM_FREQ LOG_NORM_FREQ
P20657_1010 53 . A G,T,C 0 . DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD A 110641 REF 110670 reference AID A 1 183.45 85.82 0.9997470 -0.0003651
P20657_1010 53 . A G,T,C 0 . DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD G 21 SNP 110670 A_to_G AID A 1 183.45 85.82 0.0001988 -12.2964731
P20657_1010 53 . A G,T,C 0 . DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD T 6 SNP 110670 A_to_T AID A 1 183.45 85.82 0.0000633 -13.9485497
P20657_1010 53 . A G,T,C 0 . DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD C 2 SNP 110670 A_to_C AID A 1 183.45 85.82 0.0000271 -15.1709422
P20657_1002 53 . A G,T,C 0 . DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD A 244378 REF 244419 reference WT A 2 285.05 94.97 0.9998363 -0.0002361
P20657_1002 53 . A G,T,C 0 . DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD G 35 SNP 244419 A_to_G WT A 2 285.05 94.97 0.0001473 -12.7290719

Calculate relative mutation frequencies

Let’s calculate the frequency of all alternative alleles at each position

df_seq = df_seq %>%
  group_by(SAMPLE, POS) %>%
  mutate(TOTAL_MUT_FREQ = ifelse(MUT_TYPE == "SNP",
                           sum(NORM_FREQ[MUT_TYPE == "SNP"]),
                           NA)) %>%
  ungroup() %>% 
  mutate(LOG_TOTAL_MUT_FREQ = log(TOTAL_MUT_FREQ))

Calculate background subtracted mutation frequencies

First, we calculate the average mutation frequency at each base for the WT samples

df_mut_freq_wt =
  df_seq %>% 
  filter(MUT_TYPE == "SNP",
         EDITOR == "WT") %>% 
  group_by(POS) %>% 
  summarise(MEAN_TOTAL_MUT_FREQ_WT = mean(TOTAL_MUT_FREQ)) %>% 
  ungroup()

# Join with original data frame

df_seq =
  df_seq %>% 
  full_join(df_mut_freq_wt,by = "POS")

let’s repeat but for each individual transition

df_mut_freq_wt_trans =
  df_seq %>% 
  filter(MUT_TYPE == "SNP",
         EDITOR == "WT") %>% 
  group_by(POS,EDIT_TYPE) %>% 
  summarise(MEAN_NORM_FREQ_WT = mean(NORM_FREQ)) %>% 
  ungroup()
## `summarise()` has grouped output by 'POS'. You can override using the `.groups` argument.
df_seq =
  df_seq %>% 
  full_join(df_mut_freq_wt_trans,by = c("POS","EDIT_TYPE"))

Let’s now subtract the T0 alternative alleles frequency. Let’s define this as the mutation enrichment of a given base.

df_seq =
  df_seq %>%
  filter(MUT_TYPE == "SNP") %>%
  mutate(TOTAL_MUT_ENRICH = TOTAL_MUT_FREQ-MEAN_TOTAL_MUT_FREQ_WT) %>%
  mutate(TOTAL_MUT_ENRICH = ifelse(TOTAL_MUT_ENRICH < 0,
                               0,
                               TOTAL_MUT_ENRICH))

Also subtract T0 from the individual transistions

df_seq =
  df_seq %>%
  filter(MUT_TYPE == "SNP") %>%
  mutate(MUT_ENRICH = NORM_FREQ-MEAN_NORM_FREQ_WT) %>%
  mutate(MUT_ENRICH = ifelse(MUT_ENRICH < 0,
                               0,
                               MUT_ENRICH))

Analysis of all mutations

WT

df_seq %>% 
  filter(EDITOR == "WT") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("Wild-type",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "AID") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("AID only",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

Nishida site

df_seq %>% 
  filter(EDITOR == "Nish") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 767, xmax = 769, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("Nishida gRNA",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "Nish") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>% 
  ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 767, xmax = 769, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 748, xmax = 788, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Log normalized muation frequency") +
  ggtitle("Nishida gRNA",subtitle = "C->T & A->G only") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "Nish") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(EDIT_TYPE %in% c("C_to_T","C_to_A","G_to_T","G_to_A")) %>% 
  ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 767, xmax = 769, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 748, xmax = 788, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Log normalized muation frequency") +
  ggtitle("Nishida gRNA",subtitle = "G transitions") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

Compare overall mutation frequency across sites

df_seq %>% 
  filter(EDITOR == "Nish") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=SITE,y=TOTAL_MUT_ENRICH, fill = SITE)) +
  geom_violin(draw_quantiles = 0.5) +
  stat_compare_means(comparisons = list(c("A","B"),c("B","C"),c("A","C"))) +
  ggtitle("Nishida gRNA",subtitle = "All mutations") +
  xlab("CAN1 site") +
  ylab("Proportion of mutated bases (background normalized)") +
  theme_bw()
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

df_seq %>% 
  filter(EDITOR == "Nish") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>% 
  ggplot(aes(x=SITE,y=LOG_NORM_FREQ, fill = SITE)) +
  geom_violin(draw_quantiles = 0.5) +
  stat_compare_means(comparisons = list(c("A","B"),c("B","C"),c("A","C"))) +
  ggtitle("Nishida gRNA",subtitle = "C->T & A->G only") +
  xlab("CAN1 site") +
  ylab("Log normalized mutation frequency") +
  theme_bw()
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

gRNA7 site

df_seq %>% 
  filter(EDITOR == "gRNA7") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "A"), aes(xmin = 108, xmax = 110, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "A"), aes(xmin = 89, xmax = 129, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("gRNA 7",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "gRNA7") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "A"), aes(xmin = 108, xmax = 110, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "A"), aes(xmin = 89, xmax = 129, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("gRNA 7",subtitle = "All mutations, colored by edited base") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "gRNA7") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>% 
  ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "A"), aes(xmin = 108, xmax = 110, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "A"), aes(xmin = 89, xmax = 129, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Log normalized mutation frequency") +
  ggtitle("gRNA 7",subtitle = "C->T & A->G only") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

Alternative site

df_seq %>% 
  filter(EDITOR == "Altern") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806, xmax = 808, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("Alternative gRNA site",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "Altern") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806, xmax = 808, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("Alternative gRNA site",subtitle = "All mutations, colored by edited base") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "Altern") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>% 
  ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806, xmax = 808, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "none") +
  ylab("Log normalized mutation frequency") +
  ggtitle("Alternative gRNA site",subtitle = "C->T & A->G only") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

PmCDA1

df_seq %>% 
  filter(EDITOR == "PmCDA1") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806.8, xmax = 807.2, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("PmCDA1",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "PmCDA1") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806.8, xmax = 807.2, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("PmCDA1",subtitle = "All mutations, colored by edited base") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "PmCDA1") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>% 
  ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806.8, xmax = 807.2, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Log normalized mutation frequency") +
  ggtitle("PmDCA1",subtitle = "C->T & A->G only") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

df_seq %>% 
  filter(EDITOR == "Altern") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point(alpha = 0.5) +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806, xmax = 808, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("Alternatiive gRNA site",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7,
        legend.position = "bottom") 

df_seq %>% 
  filter(EDITOR == "gRNA7") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point(alpha = 0.5) +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "A"), aes(xmin = 108, xmax = 110, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("gRNA 7",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

Trim edges

xlim_nishida = c(717,816)

df_seq %>% 
  filter(EDITOR == "WT") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 767, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  xlim(xlim_nishida[1],xlim_nishida[2]) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("WT",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 
## Warning: Removed 2025 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR == "AID") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 767, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  xlim(xlim_nishida[1],xlim_nishida[2]) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("AID only",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 
## Warning: Removed 1656 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR == "Nish") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 767, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  xlim(xlim_nishida[1],xlim_nishida[2]) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("Nishida",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 
## Warning: Removed 2025 rows containing missing values (geom_point).

Check PmCDA1 efficiency

xlim_nishida = c(735,807)

df_seq %>% 
  filter(EDITOR == "WT") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  xlim(xlim_nishida[1],xlim_nishida[2]) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("WT",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 
## Warning: Removed 2142 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR == "AID") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  xlim(xlim_nishida[1],xlim_nishida[2]) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("AID only",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 
## Warning: Removed 1734 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR == "PmCDA1") %>% 
  filter(MUT_TYPE == "SNP") %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  theme_bw() +
  scale_color_viridis_d(option = "D") +
  #geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  geom_rect(data = data.frame(SITE = "B"), aes(xmin = 781.9, xmax = 782.1, ymin = -Inf, ymax = Inf), alpha=1, fill="grey", inherit.aes = F) +
  geom_point() +
  xlim(770,790) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ylim(c(0,0.015)) +
  ggtitle("PmCDA1",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7)
## Warning: Removed 1164 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR == "PmCDA1") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(POS>780,POS < 785)
SAMPLE POS ID REF ALT QUAL FILTER VCF_INFO FORMAT ALLELE FREQ MUT_TYPE READ_DEPTH EDIT_TYPE EDITOR SITE REPLICATE Kreads >=Q30 NORM_FREQ LOG_NORM_FREQ TOTAL_MUT_FREQ LOG_TOTAL_MUT_FREQ MEAN_TOTAL_MUT_FREQ_WT MEAN_NORM_FREQ_WT TOTAL_MUT_ENRICH MUT_ENRICH
P20657_1066 781 . C T,G,A 0 . DP=3163412;I16=3.16247e+06,6,923,0,1.25865e+08,5.01079e+09,35334,1.36205e+06,1.89749e+08,1.13849e+10,55380,3.3228e+06,7.90619e+07,1.97655e+09,23075,576875;QS=65.98,0.018119,0.00120617,0.00070575;VDB=0;SGB=-15609;RPB=0.999216;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD T 15 SNP 140560 C_to_T PmCDA1 B 1 155.45 96.56 0.0001138 -13.100827 0.0001850 -8.595293 0.0001677 0.0001214 0.0000173 0.0000000
P20657_1066 781 . C T,G,A 0 . DP=3163412;I16=3.16247e+06,6,923,0,1.25865e+08,5.01079e+09,35334,1.36205e+06,1.89749e+08,1.13849e+10,55380,3.3228e+06,7.90619e+07,1.97655e+09,23075,576875;QS=65.98,0.018119,0.00120617,0.00070575;VDB=0;SGB=-15609;RPB=0.999216;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD G 4 SNP 140560 C_to_G PmCDA1 B 1 155.45 96.56 0.0000356 -14.778899 0.0001850 -8.595293 0.0001677 0.0000284 0.0000173 0.0000071
P20657_1066 781 . C T,G,A 0 . DP=3163412;I16=3.16247e+06,6,923,0,1.25865e+08,5.01079e+09,35334,1.36205e+06,1.89749e+08,1.13849e+10,55380,3.3228e+06,7.90619e+07,1.97655e+09,23075,576875;QS=65.98,0.018119,0.00120617,0.00070575;VDB=0;SGB=-15609;RPB=0.999216;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD A 4 SNP 140560 C_to_A PmCDA1 B 1 155.45 96.56 0.0000356 -14.778899 0.0001850 -8.595293 0.0001677 0.0000178 0.0000173 0.0000178
P20657_1066 782 . G A,C,T 0 . DP=3163409;I16=3.15948e+06,6,3919,0,1.26129e+08,5.03551e+09,154938,6.13251e+06,1.89569e+08,1.13741e+10,235140,1.41084e+07,7.89872e+07,1.97468e+09,97975,2.44938e+06;QS=65.8904,0.0720559,0.0357055,0.00188342;VDB=0;SGB=-77742.3;RPB=0.98563;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD A 446 SNP 140561 G_to_A PmCDA1 B 1 155.45 96.56 0.0031801 -8.296706 0.0062677 -5.072339 0.0001221 0.0000812 0.0061456 0.0030989
P20657_1066 782 . G A,C,T 0 . DP=3163409;I16=3.15948e+06,6,3919,0,1.26129e+08,5.03551e+09,154938,6.13251e+06,1.89569e+08,1.13741e+10,235140,1.41084e+07,7.89872e+07,1.97468e+09,97975,2.44938e+06;QS=65.8904,0.0720559,0.0357055,0.00188342;VDB=0;SGB=-77742.3;RPB=0.98563;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD C 390 SNP 140561 G_to_C PmCDA1 B 1 155.45 96.56 0.0027817 -8.489812 0.0062677 -5.072339 0.0001221 0.0000165 0.0061456 0.0027652
P20657_1066 782 . G A,C,T 0 . DP=3163409;I16=3.15948e+06,6,3919,0,1.26129e+08,5.03551e+09,154938,6.13251e+06,1.89569e+08,1.13741e+10,235140,1.41084e+07,7.89872e+07,1.97468e+09,97975,2.44938e+06;QS=65.8904,0.0720559,0.0357055,0.00188342;VDB=0;SGB=-77742.3;RPB=0.98563;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD T 42 SNP 140561 G_to_T PmCDA1 B 1 155.45 96.56 0.0003059 -11.674572 0.0062677 -5.072339 0.0001221 0.0000244 0.0061456 0.0002815
P20657_1066 783 . T C,A,G 0 . DP=3163362;I16=3.1628e+06,6,551,0,1.25982e+08,5.01913e+09,20617,779297,1.89768e+08,1.13861e+10,33060,1.9836e+06,7.90702e+07,1.97675e+09,13775,344375;QS=65.986,0.0131302,0.0008268,5.91829e-05;VDB=0;SGB=-9355.51;RPB=0.99991;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD C 21 SNP 140559 T_to_C PmCDA1 B 1 155.45 96.56 0.0001565 -12.641385 0.0001992 -8.521178 0.0002042 0.0001514 0.0000000 0.0000051
P20657_1066 783 . T C,A,G 0 . DP=3163362;I16=3.1628e+06,6,551,0,1.25982e+08,5.01913e+09,20617,779297,1.89768e+08,1.13861e+10,33060,1.9836e+06,7.90702e+07,1.97675e+09,13775,344375;QS=65.986,0.0131302,0.0008268,5.91829e-05;VDB=0;SGB=-9355.51;RPB=0.99991;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD A 3 SNP 140559 T_to_A PmCDA1 B 1 155.45 96.56 0.0000285 -15.100816 0.0001992 -8.521178 0.0002042 0.0000475 0.0000000 0.0000000
P20657_1066 783 . T C,A,G 0 . DP=3163362;I16=3.1628e+06,6,551,0,1.25982e+08,5.01913e+09,20617,779297,1.89768e+08,1.13861e+10,33060,1.9836e+06,7.90702e+07,1.97675e+09,13775,344375;QS=65.986,0.0131302,0.0008268,5.91829e-05;VDB=0;SGB=-9355.51;RPB=0.99991;MQB=1;MQSB=1;BQB=0;MQ0F=0 PL:AD G 1 SNP 140559 T_to_G PmCDA1 B 1 155.45 96.56 0.0000142 -16.100816 0.0001992 -8.521178 0.0002042 0.0000053 0.0000000 0.0000089
P20657_1066 784 . T A,C,G 0 . DP=3163412;I16=3.16294e+06,6,415,0,1.26185e+08,5.03467e+09,15965,617977,1.89777e+08,1.13866e+10,24900,1.494e+06,7.90736e+07,1.97684e+09,10375,259375;QS=65.9798,0.0176772,0.00244375,3.88912e-05;VDB=0;SGB=-7069.3;RPB=0.958516;MQB=1;MQSB=1;BQB=1.03763e-29;MQ0F=0 PL:AD A 1 SNP 140559 T_to_A PmCDA1 B 1 155.45 96.56 0.0000142 -16.100816 0.0001565 -8.762340 0.0001608 0.0000105 0.0000000 0.0000038
P20657_1066 784 . T A,C,G 0 . DP=3163412;I16=3.16294e+06,6,415,0,1.26185e+08,5.03467e+09,15965,617977,1.89777e+08,1.13866e+10,24900,1.494e+06,7.90736e+07,1.97684e+09,10375,259375;QS=65.9798,0.0176772,0.00244375,3.88912e-05;VDB=0;SGB=-7069.3;RPB=0.958516;MQB=1;MQSB=1;BQB=1.03763e-29;MQ0F=0 PL:AD C 18 SNP 140559 T_to_C PmCDA1 B 1 155.45 96.56 0.0001352 -12.852889 0.0001565 -8.762340 0.0001608 0.0001450 0.0000000 0.0000000
P20657_1066 784 . T A,C,G 0 . DP=3163412;I16=3.16294e+06,6,415,0,1.26185e+08,5.03467e+09,15965,617977,1.89777e+08,1.13866e+10,24900,1.494e+06,7.90736e+07,1.97684e+09,10375,259375;QS=65.9798,0.0176772,0.00244375,3.88912e-05;VDB=0;SGB=-7069.3;RPB=0.958516;MQB=1;MQSB=1;BQB=1.03763e-29;MQ0F=0 PL:AD G 0 SNP 140559 T_to_G PmCDA1 B 1 155.45 96.56 0.0000071 -17.100816 0.0001565 -8.762340 0.0001608 0.0000053 0.0000000 0.0000018

2x

df_seq %>% 
  filter(EDITOR == "2x") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(!is.na(TOTAL_MUT_ENRICH)) %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  scale_color_viridis_d(option = "D") +
  #geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("2x",subtitle = "All mutations") +
  xlab("Position on gene") +
  ylim(c(0,0.015)) +
  theme(aspect.ratio = 0.7,
        legend.position = "none") 

df_seq %>% 
  filter(EDITOR == "2x") %>% 
  filter(MUT_TYPE == "SNP") %>%
  filter(!is.na(TOTAL_MUT_ENRICH)) %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  scale_color_viridis_d(option = "D") +
  #geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("2x",subtitle = "All mutations, colored by edited base") +
  xlab("Position on gene") +
  ylim(c(0,0.015)) +
  theme(aspect.ratio = 0.7) 

3x

df_seq %>% 
  filter(EDITOR == "3x") %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(!is.na(TOTAL_MUT_ENRICH)) %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  scale_color_viridis_d(option = "D") +
  #geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("3x",subtitle = "All mutations") +
  xlab("Position on gene") +
  ylim(c(0,0.015)) +
  theme(aspect.ratio = 0.7) 

# Colored by edited base
df_seq %>% 
  filter(EDITOR == "3x") %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(!is.na(TOTAL_MUT_ENRICH)) %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
  scale_color_viridis_d(option = "D") +
  #geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  theme(legend.position = "bottom") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("3x",subtitle = "All mutations, colored by edited base") +
  xlab("Position on gene") +
  ylim(c(0,0.015)) +
  theme(aspect.ratio = 0.7) 

3x & WT together

df_seq %>% 
  filter(EDITOR %in% c("WT","3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(!is.na(TOTAL_MUT_ENRICH)) %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = EDITOR)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  facet_wrap(vars(SITE,EDITOR),ncol = 3,scales = "free_x") +
  scale_color_viridis_d(option = "D") +
  #geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("3x",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

3x, faceted by replicate

df_seq %>% 
  filter(EDITOR %in% c("3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(!is.na(TOTAL_MUT_ENRICH)) %>%
  ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = SITE)) +
  #anno_rect_baseEditWindow +
  #anno_rect_gRNA +
  #anno_rect_PAM +
  geom_point() +
  theme_bw() +
  facet_wrap(vars(SITE,REPLICATE),ncol = 3,scales = "free_x") +
  scale_color_viridis_d(option = "D") +
  #geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
  theme(legend.position = "none") +
  ylab("Proportion of mutated bases (background normalized)") +
  ggtitle("3x",subtitle = "All mutations") +
  xlab("Position on gene") +
  theme(aspect.ratio = 0.7) 

Transition/transversion plots

WT

df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  #filter(SITE == "A") %>%
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 740 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  #filter(SITE == "A") %>%
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 719 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  #filter(SITE == "A") %>%
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 734 rows containing missing values (geom_point).

AID

df_seq %>% 
  filter(EDITOR %in% c("AID")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 549 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("AID")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 586 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("AID")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 552 rows containing missing values (geom_point).

2x

df_seq %>% 
  filter(EDITOR %in% c("2x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.25) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 622 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("2x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 630 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("2x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 629 rows containing missing values (geom_point).

3x

df_seq %>% 
  filter(EDITOR %in% c("3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.25) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 569 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 548 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 565 rows containing missing values (geom_point).

p_trans_wt =
  df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>% 
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  ggtitle("WT") +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)") +
  theme(legend.position = "none")

p_trans_gRNA =
  df_seq %>% 
  filter(EDITOR %in% c("Nish","gRNA7","Altern","2x","3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  ggtitle("All gRNAs tabulated") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)") +
  theme(legend.position = "none")

p_trans_wt / p_trans_gRNA
## Warning: Removed 726 rows containing missing values (geom_point).
## Warning: Removed 3436 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(SITE == "A") %>%
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  theme_bw() +
  ggtitle("WT") +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 247 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("Nish","gRNA7","Altern","2x","3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(SITE == "A") %>%
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  ggtitle("All gRNAs tabulated") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 1075 rows containing missing values (geom_point).

Calculate noise threshold

df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  mutate(ID = "ID") %>% 
  ggplot(aes(y=MUT_ENRICH, x=ID)) +
  geom_boxplot(draw_quantiles = T) +
  geom_jitter(alpha = 0.1) +
  theme_bw() +
  ggtitle("WT") +
  ylim(0,0.00025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Ignoring unknown parameters: draw_quantiles
## Warning: Removed 729 rows containing missing values (geom_point).

Define 99% quantile for the WT data

df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(REPLICATE != "3") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  mutate(ID = "ID") %>% 
  summarise(quantile = scales::percent(c(0.25, 0.99)),
            MUT_ENRICH_quant = quantile(MUT_ENRICH, c(0.25, 0.99)))
quantile MUT_ENRICH_quant
25% 0.00e+00
99% 4.63e-05

Compare editing profiles of WT, AID only, single gRNAs only, multiplexed gRNAs only and PmCDA1 only. Note that single gRNA profiles include off target activity as well!

df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  theme_bw() +
  ggtitle("WT") +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 6 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("AID")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  theme_bw() +
  ggtitle("AID") +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 5 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("Nish","gRNA7","Altern","2x","3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  ggtitle("All gRNAs tabulated") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 38 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("Nish","gRNA7","Altern")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  ggtitle("single gRNAs only") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 19 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("2x","3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  ggtitle("multiplexed gRNAs only") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 19 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("PmCDA1")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  ggtitle("PmCDA1 only") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 5 rows containing missing values (geom_point).

Exclude fragment C from plots

df_seq %>% 
  filter(EDITOR %in% c("WT")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(SITE != "C") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  theme_bw() +
  ggtitle("WT") +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 3 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("AID")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(SITE != "C") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  theme_bw() +
  ggtitle("AID") +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 3 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("Nish","gRNA7","Altern","2x","3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(SITE != "C") %>% 
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  ggtitle("All gRNAs tabulated") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 24 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("Nish","gRNA7","Altern")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(SITE != "C") %>%
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  ggtitle("single gRNAs only") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 10 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("2x","3x")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(SITE != "C") %>%
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  ggtitle("multiplexed gRNAs only") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 14 rows containing missing values (geom_point).

df_seq %>% 
  filter(EDITOR %in% c("PmCDA1")) %>% 
  filter(MUT_TYPE == "SNP") %>% 
  filter(SITE != "C") %>%
  mutate(REF = factor(REF, levels = c("A","C","G","T")),
         ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>% 
  filter(!is.na(MUT_ENRICH)) %>%
  ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
  geom_jitter() +
  geom_hline(yintercept = 5.5723e-05) +
  ggtitle("PmCDA1 only") +
  theme_bw() +
  facet_wrap(vars(REF),ncol = 4) +
  ylim(0,0.0025) +
  ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 2 rows containing missing values (geom_point).